{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5743b630",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Execute this cell to install dependencies\n",
    "%pip install sf-hamilton[visualization]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8c6ee201",
   "metadata": {},
   "source": [
    "# Parallelism: file processing [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/file_processing/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/file_processing/notebook.ipynb)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c32082e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/elijahbenizzy/.pyenv/versions/3.9.10/envs/hamilton/lib/python3.9/site-packages/pyspark/pandas/__init__.py:50: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "import matplotlib\n",
    "import pandas as pd\n",
    "import aggregate_data, list_data, process_data\n",
    "from hamilton import driver\n",
    "from hamilton.execution import executors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bf2fe2e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9942b8dd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/apache/hamilton#usage-analytics--data-privacy for details.\n"
     ]
    }
   ],
   "source": [
    "dr = driver.Builder(). \\\n",
    "        enable_dynamic_execution(allow_experimental_mode=True) \\\n",
    "        .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=100)) \\\n",
    "        .with_local_executor(executors.SynchronousLocalTaskExecutor()) \\\n",
    "        .with_modules(aggregate_data, list_data, process_data) \\\n",
    "        .build()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "784fe5da",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n",
       " -->\n",
       "<!-- Pages: 1 -->\n",
       "<svg width=\"888pt\" height=\"852pt\"\n",
       " viewBox=\"0.00 0.00 887.75 852.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 848)\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-848 883.75,-848 883.75,4 -4,4\"/>\n",
       "<!-- person_capacity -->\n",
       "<g id=\"node1\" class=\"node\">\n",
       "<title>person_capacity</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"180.11\" cy=\"-242\" rx=\"70.8\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"180.11\" y=\"-236.95\" font-family=\"Times,serif\" font-size=\"14.00\">person_capacity</text>\n",
       "</g>\n",
       "<!-- mean_price_per_capacity -->\n",
       "<g id=\"node3\" class=\"node\">\n",
       "<title>mean_price_per_capacity</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"106.11\" cy=\"-170\" rx=\"106.11\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"106.11\" y=\"-164.95\" font-family=\"Times,serif\" font-size=\"14.00\">mean_price_per_capacity</text>\n",
       "</g>\n",
       "<!-- person_capacity&#45;&gt;mean_price_per_capacity -->\n",
       "<g id=\"edge4\" class=\"edge\">\n",
       "<title>person_capacity&#45;&gt;mean_price_per_capacity</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M162.58,-224.41C153.47,-215.8 142.17,-205.1 132.07,-195.55\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"134.97,-193.53 125.3,-189.2 130.16,-198.62 134.97,-193.53\"/>\n",
       "</g>\n",
       "<!-- realSum -->\n",
       "<g id=\"node2\" class=\"node\">\n",
       "<title>realSum</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"332.11\" cy=\"-242\" rx=\"42.14\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"332.11\" y=\"-236.95\" font-family=\"Times,serif\" font-size=\"14.00\">realSum</text>\n",
       "</g>\n",
       "<!-- realSum&#45;&gt;mean_price_per_capacity -->\n",
       "<g id=\"edge3\" class=\"edge\">\n",
       "<title>realSum&#45;&gt;mean_price_per_capacity</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M298.77,-230.67C263.9,-219.87 208.46,-202.7 165.98,-189.54\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"167.07,-185.91 156.48,-186.29 165,-192.59 167.07,-185.91\"/>\n",
       "</g>\n",
       "<!-- max_price -->\n",
       "<g id=\"node4\" class=\"node\">\n",
       "<title>max_price</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"280.11\" cy=\"-170\" rx=\"50.33\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"280.11\" y=\"-164.95\" font-family=\"Times,serif\" font-size=\"14.00\">max_price</text>\n",
       "</g>\n",
       "<!-- realSum&#45;&gt;max_price -->\n",
       "<g id=\"edge5\" class=\"edge\">\n",
       "<title>realSum&#45;&gt;max_price</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M319.79,-224.41C313.51,-215.95 305.74,-205.49 298.74,-196.08\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"301.12,-194.41 292.35,-188.47 295.5,-198.58 301.12,-194.41\"/>\n",
       "</g>\n",
       "<!-- mean_price -->\n",
       "<g id=\"node12\" class=\"node\">\n",
       "<title>mean_price</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"403.11\" cy=\"-170\" rx=\"54.42\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"403.11\" y=\"-164.95\" font-family=\"Times,serif\" font-size=\"14.00\">mean_price</text>\n",
       "</g>\n",
       "<!-- realSum&#45;&gt;mean_price -->\n",
       "<g id=\"edge13\" class=\"edge\">\n",
       "<title>realSum&#45;&gt;mean_price</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M348.23,-225.12C357.27,-216.2 368.72,-204.91 378.83,-194.94\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"380.91,-197.82 385.57,-188.31 376,-192.84 380.91,-197.82\"/>\n",
       "</g>\n",
       "<!-- statistics -->\n",
       "<g id=\"node23\" class=\"node\">\n",
       "<title>statistics</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"490.11\" cy=\"-98\" rx=\"42.65\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"490.11\" y=\"-92.95\" font-family=\"Times,serif\" font-size=\"14.00\">statistics</text>\n",
       "</g>\n",
       "<!-- mean_price_per_capacity&#45;&gt;statistics -->\n",
       "<g id=\"edge26\" class=\"edge\">\n",
       "<title>mean_price_per_capacity&#45;&gt;statistics</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.55,-156.16C253.15,-142.2 373.62,-120.24 440.75,-108\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"441.24,-111.29 450.45,-106.05 439.98,-104.4 441.24,-111.29\"/>\n",
       "</g>\n",
       "<!-- max_price&#45;&gt;statistics -->\n",
       "<g id=\"edge28\" class=\"edge\">\n",
       "<title>max_price&#45;&gt;statistics</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M315.74,-157.13C352.15,-144.99 408.72,-126.13 447.49,-113.21\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"448.05,-116.38 456.43,-109.89 445.84,-109.74 448.05,-116.38\"/>\n",
       "</g>\n",
       "<!-- load_data.weekday_data.wkd_data -->\n",
       "<g id=\"node5\" class=\"node\">\n",
       "<title>load_data.weekday_data.wkd_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"401.11\" cy=\"-530\" rx=\"140.41\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"401.11\" y=\"-524.95\" font-family=\"Times,serif\" font-size=\"14.00\">load_data.weekday_data.wkd_data</text>\n",
       "</g>\n",
       "<!-- wkd_data -->\n",
       "<g id=\"node20\" class=\"node\">\n",
       "<title>wkd_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"480.11\" cy=\"-458\" rx=\"46.75\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"480.11\" y=\"-452.95\" font-family=\"Times,serif\" font-size=\"14.00\">wkd_data</text>\n",
       "</g>\n",
       "<!-- load_data.weekday_data.wkd_data&#45;&gt;wkd_data -->\n",
       "<g id=\"edge21\" class=\"edge\">\n",
       "<title>load_data.weekday_data.wkd_data&#45;&gt;wkd_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M420.64,-511.7C430.77,-502.73 443.26,-491.65 454.18,-481.98\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"456,-485.16 461.16,-475.91 451.36,-479.92 456,-485.16\"/>\n",
       "</g>\n",
       "<!-- guest_satisfaction_overall -->\n",
       "<g id=\"node6\" class=\"node\">\n",
       "<title>guest_satisfaction_overall</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"551.11\" cy=\"-242\" rx=\"107.14\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"551.11\" y=\"-236.95\" font-family=\"Times,serif\" font-size=\"14.00\">guest_satisfaction_overall</text>\n",
       "</g>\n",
       "<!-- mean_guest_satisfaction -->\n",
       "<g id=\"node13\" class=\"node\">\n",
       "<title>mean_guest_satisfaction</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"577.11\" cy=\"-170\" rx=\"102.02\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"577.11\" y=\"-164.95\" font-family=\"Times,serif\" font-size=\"14.00\">mean_guest_satisfaction</text>\n",
       "</g>\n",
       "<!-- guest_satisfaction_overall&#45;&gt;mean_guest_satisfaction -->\n",
       "<g id=\"edge14\" class=\"edge\">\n",
       "<title>guest_satisfaction_overall&#45;&gt;mean_guest_satisfaction</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M557.54,-223.7C560.38,-216.07 563.77,-206.92 566.94,-198.4\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"570.56,-199.7 570.76,-189.1 564,-197.26 570.56,-199.7\"/>\n",
       "</g>\n",
       "<!-- cleanliness_rating -->\n",
       "<g id=\"node7\" class=\"node\">\n",
       "<title>cleanliness_rating</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"766.11\" cy=\"-242\" rx=\"77.45\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"766.11\" y=\"-236.95\" font-family=\"Times,serif\" font-size=\"14.00\">cleanliness_rating</text>\n",
       "</g>\n",
       "<!-- mean_cleanliness -->\n",
       "<g id=\"node10\" class=\"node\">\n",
       "<title>mean_cleanliness</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"773.11\" cy=\"-170\" rx=\"76.43\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"773.11\" y=\"-164.95\" font-family=\"Times,serif\" font-size=\"14.00\">mean_cleanliness</text>\n",
       "</g>\n",
       "<!-- cleanliness_rating&#45;&gt;mean_cleanliness -->\n",
       "<g id=\"edge11\" class=\"edge\">\n",
       "<title>cleanliness_rating&#45;&gt;mean_cleanliness</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M767.85,-223.7C768.59,-216.24 769.48,-207.32 770.32,-198.97\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"773.89,-199.4 771.4,-189.1 766.93,-198.71 773.89,-199.4\"/>\n",
       "</g>\n",
       "<!-- weekend_data -->\n",
       "<g id=\"node8\" class=\"node\">\n",
       "<title>weekend_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"674.11\" cy=\"-386\" rx=\"63.63\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"674.11\" y=\"-380.95\" font-family=\"Times,serif\" font-size=\"14.00\">weekend_data</text>\n",
       "</g>\n",
       "<!-- all_data -->\n",
       "<g id=\"node14\" class=\"node\">\n",
       "<title>all_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"544.11\" cy=\"-314\" rx=\"40.09\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"544.11\" y=\"-308.95\" font-family=\"Times,serif\" font-size=\"14.00\">all_data</text>\n",
       "</g>\n",
       "<!-- weekend_data&#45;&gt;all_data -->\n",
       "<g id=\"edge16\" class=\"edge\">\n",
       "<title>weekend_data&#45;&gt;all_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M645.59,-369.64C625.84,-359.01 599.46,-344.8 578.55,-333.54\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"580.53,-330.1 570.07,-328.44 577.21,-336.26 580.53,-330.1\"/>\n",
       "</g>\n",
       "<!-- city_data -->\n",
       "<g id=\"node9\" class=\"node\">\n",
       "<title>city_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"704.11\" cy=\"-678\" rx=\"44.7\" ry=\"18\"/>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"704.11\" cy=\"-678\" rx=\"48.7\" ry=\"22\"/>\n",
       "<text text-anchor=\"middle\" x=\"704.11\" y=\"-672.95\" font-family=\"Times,serif\" font-size=\"14.00\">city_data</text>\n",
       "</g>\n",
       "<!-- weekday_file -->\n",
       "<g id=\"node17\" class=\"node\">\n",
       "<title>weekday_file</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"483.11\" cy=\"-602\" rx=\"60.56\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"483.11\" y=\"-596.95\" font-family=\"Times,serif\" font-size=\"14.00\">weekday_file</text>\n",
       "</g>\n",
       "<!-- city_data&#45;&gt;weekday_file -->\n",
       "<g id=\"edge18\" class=\"edge\">\n",
       "<title>city_data&#45;&gt;weekday_file</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M665.62,-664.11C628.41,-651.65 571.91,-632.73 531.64,-619.25\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"531.77,-619.29 523.71,-611.85 527.02,-617.7 522.28,-616.12 522.28,-616.12 522.28,-616.12 527.02,-617.7 520.85,-620.38 531.77,-619.29 531.77,-619.29\"/>\n",
       "</g>\n",
       "<!-- weekend_file -->\n",
       "<g id=\"node19\" class=\"node\">\n",
       "<title>weekend_file</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"704.11\" cy=\"-602\" rx=\"60.56\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"704.11\" y=\"-596.95\" font-family=\"Times,serif\" font-size=\"14.00\">weekend_file</text>\n",
       "</g>\n",
       "<!-- city_data&#45;&gt;weekend_file -->\n",
       "<g id=\"edge20\" class=\"edge\">\n",
       "<title>city_data&#45;&gt;weekend_file</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M704.11,-655.6C704.11,-647.58 704.11,-638.38 704.11,-629.93\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"704.11,-630.13 708.61,-620.13 704.11,-625.13 704.11,-620.13 704.11,-620.13 704.11,-620.13 704.11,-625.13 699.61,-620.13 704.11,-630.13 704.11,-630.13\"/>\n",
       "</g>\n",
       "<!-- city_data&#45;&gt;statistics -->\n",
       "<g id=\"edge29\" class=\"edge\">\n",
       "<title>city_data&#45;&gt;statistics</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M742.51,-664.08C793.48,-644.21 877.11,-600.96 877.11,-531 877.11,-531 877.11,-531 877.11,-241 877.11,-200.55 887.66,-179.62 858.11,-152 813.46,-110.26 633.45,-101.27 542.81,-99.42\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"543,-99.42 533.09,-94.74 538,-99.33 533,-99.24 533,-99.24 533,-99.24 538,-99.33 532.92,-103.74 543,-99.42 543,-99.42\"/>\n",
       "</g>\n",
       "<!-- mean_cleanliness&#45;&gt;statistics -->\n",
       "<g id=\"edge27\" class=\"edge\">\n",
       "<title>mean_cleanliness&#45;&gt;statistics</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M721.87,-156.32C669.08,-143.27 587.52,-123.09 536.56,-110.49\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"537.72,-106.92 527.17,-107.92 536.04,-113.72 537.72,-106.92\"/>\n",
       "</g>\n",
       "<!-- weekday_data -->\n",
       "<g id=\"node11\" class=\"node\">\n",
       "<title>weekday_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"519.11\" cy=\"-386\" rx=\"63.63\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"519.11\" y=\"-380.95\" font-family=\"Times,serif\" font-size=\"14.00\">weekday_data</text>\n",
       "</g>\n",
       "<!-- weekday_data&#45;&gt;all_data -->\n",
       "<g id=\"edge15\" class=\"edge\">\n",
       "<title>weekday_data&#45;&gt;all_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M525.29,-367.7C528.02,-360.07 531.29,-350.92 534.33,-342.4\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"537.94,-343.7 538.01,-333.1 531.35,-341.34 537.94,-343.7\"/>\n",
       "</g>\n",
       "<!-- mean_price&#45;&gt;statistics -->\n",
       "<g id=\"edge25\" class=\"edge\">\n",
       "<title>mean_price&#45;&gt;statistics</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M422.86,-153.12C434.59,-143.68 449.64,-131.56 462.54,-121.19\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"464.45,-123.34 470.05,-114.34 460.06,-117.88 464.45,-123.34\"/>\n",
       "</g>\n",
       "<!-- mean_guest_satisfaction&#45;&gt;statistics -->\n",
       "<g id=\"edge24\" class=\"edge\">\n",
       "<title>mean_guest_satisfaction&#45;&gt;statistics</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M556.05,-152.05C544.45,-142.72 529.91,-131.02 517.44,-120.99\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"520.19,-117.9 510.2,-114.36 515.8,-123.35 520.19,-117.9\"/>\n",
       "</g>\n",
       "<!-- all_data&#45;&gt;person_capacity -->\n",
       "<g id=\"edge1\" class=\"edge\">\n",
       "<title>all_data&#45;&gt;person_capacity</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M507.88,-306.03C446.83,-294.29 322.61,-270.4 245.85,-255.64\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"246.83,-252.07 236.35,-253.62 245.51,-258.95 246.83,-252.07\"/>\n",
       "</g>\n",
       "<!-- all_data&#45;&gt;realSum -->\n",
       "<g id=\"edge2\" class=\"edge\">\n",
       "<title>all_data&#45;&gt;realSum</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M512.38,-302.52C475.9,-290.48 415.85,-270.65 375.18,-257.22\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"376.36,-253.59 365.77,-253.78 374.17,-260.24 376.36,-253.59\"/>\n",
       "</g>\n",
       "<!-- all_data&#45;&gt;guest_satisfaction_overall -->\n",
       "<g id=\"edge7\" class=\"edge\">\n",
       "<title>all_data&#45;&gt;guest_satisfaction_overall</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M545.85,-295.7C546.59,-288.24 547.48,-279.32 548.32,-270.97\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"551.89,-271.4 549.4,-261.1 544.93,-270.71 551.89,-271.4\"/>\n",
       "</g>\n",
       "<!-- all_data&#45;&gt;cleanliness_rating -->\n",
       "<g id=\"edge8\" class=\"edge\">\n",
       "<title>all_data&#45;&gt;cleanliness_rating</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M576.4,-302.82C611.65,-291.71 668.73,-273.71 711.08,-260.35\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"712.03,-263.41 720.51,-257.06 709.92,-256.73 712.03,-263.41\"/>\n",
       "</g>\n",
       "<!-- data_dir -->\n",
       "<g id=\"node15\" class=\"node\">\n",
       "<title>data_dir</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"704.11\" cy=\"-826\" rx=\"65.68\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"704.11\" y=\"-820.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: data_dir</text>\n",
       "</g>\n",
       "<!-- files -->\n",
       "<g id=\"node18\" class=\"node\">\n",
       "<title>files</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"704.11\" cy=\"-754\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"704.11\" y=\"-748.95\" font-family=\"Times,serif\" font-size=\"14.00\">files</text>\n",
       "</g>\n",
       "<!-- data_dir&#45;&gt;files -->\n",
       "<g id=\"edge19\" class=\"edge\">\n",
       "<title>data_dir&#45;&gt;files</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M704.11,-807.7C704.11,-800.24 704.11,-791.32 704.11,-782.97\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"707.61,-783.1 704.11,-773.1 700.61,-783.1 707.61,-783.1\"/>\n",
       "</g>\n",
       "<!-- load_data.weekend_data.wknd_data -->\n",
       "<g id=\"node16\" class=\"node\">\n",
       "<title>load_data.weekend_data.wknd_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"704.11\" cy=\"-530\" rx=\"145.01\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"704.11\" y=\"-524.95\" font-family=\"Times,serif\" font-size=\"14.00\">load_data.weekend_data.wknd_data</text>\n",
       "</g>\n",
       "<!-- wknd_data -->\n",
       "<g id=\"node22\" class=\"node\">\n",
       "<title>wknd_data</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"684.11\" cy=\"-458\" rx=\"51.35\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"684.11\" y=\"-452.95\" font-family=\"Times,serif\" font-size=\"14.00\">wknd_data</text>\n",
       "</g>\n",
       "<!-- load_data.weekend_data.wknd_data&#45;&gt;wknd_data -->\n",
       "<g id=\"edge23\" class=\"edge\">\n",
       "<title>load_data.weekend_data.wknd_data&#45;&gt;wknd_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M699.17,-511.7C697.02,-504.15 694.43,-495.12 692.02,-486.68\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"695.11,-485.76 689,-477.1 688.38,-487.68 695.11,-485.76\"/>\n",
       "</g>\n",
       "<!-- weekday_file&#45;&gt;load_data.weekday_data.wkd_data -->\n",
       "<g id=\"edge6\" class=\"edge\">\n",
       "<title>weekday_file&#45;&gt;load_data.weekday_data.wkd_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M464.1,-584.76C453.74,-575.92 440.73,-564.82 429.23,-555\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"432.05,-551.95 422.17,-548.12 427.51,-557.28 432.05,-551.95\"/>\n",
       "</g>\n",
       "<!-- files&#45;&gt;city_data -->\n",
       "<g id=\"edge10\" class=\"edge\">\n",
       "<title>files&#45;&gt;city_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M704.11,-735.84C704.11,-728.47 704.11,-719.62 704.11,-711.12\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"707.61,-711.3 704.11,-701.3 700.61,-711.3 707.61,-711.3\"/>\n",
       "</g>\n",
       "<!-- weekend_file&#45;&gt;load_data.weekend_data.wknd_data -->\n",
       "<g id=\"edge17\" class=\"edge\">\n",
       "<title>weekend_file&#45;&gt;load_data.weekend_data.wknd_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M704.11,-583.7C704.11,-576.24 704.11,-567.32 704.11,-558.97\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"707.61,-559.1 704.11,-549.1 700.61,-559.1 707.61,-559.1\"/>\n",
       "</g>\n",
       "<!-- wkd_data&#45;&gt;weekday_data -->\n",
       "<g id=\"edge12\" class=\"edge\">\n",
       "<title>wkd_data&#45;&gt;weekday_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M489.56,-440.05C493.95,-432.18 499.27,-422.62 504.19,-413.79\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"507.68,-415.71 509.49,-405.28 501.57,-412.31 507.68,-415.71\"/>\n",
       "</g>\n",
       "<!-- statistics_by_city -->\n",
       "<g id=\"node21\" class=\"node\">\n",
       "<title>statistics_by_city</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"544.99,-40 435.24,-40 435.24,-4 544.99,-4 544.99,-40\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"548.99,-44 431.24,-44 431.24,0 548.99,0 548.99,-44\"/>\n",
       "<text text-anchor=\"middle\" x=\"490.11\" y=\"-16.95\" font-family=\"Times,serif\" font-size=\"14.00\">statistics_by_city</text>\n",
       "</g>\n",
       "<!-- wknd_data&#45;&gt;weekend_data -->\n",
       "<g id=\"edge9\" class=\"edge\">\n",
       "<title>wknd_data&#45;&gt;weekend_data</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M681.64,-439.7C680.58,-432.24 679.3,-423.32 678.11,-414.97\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"681.44,-414.51 676.56,-405.1 674.51,-415.5 681.44,-414.51\"/>\n",
       "</g>\n",
       "<!-- statistics&#45;&gt;statistics_by_city -->\n",
       "<g id=\"edge22\" class=\"edge\">\n",
       "<title>statistics&#45;&gt;statistics_by_city</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M490.11,-70.02C490.11,-65.12 490.11,-59.97 490.11,-54.95\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"490.11,-69.84 485.61,-79.84 490.11,-74.84 490.11,-79.84 490.11,-79.84 490.11,-79.84 490.11,-74.84 494.61,-79.84 490.11,-69.84 490.11,-69.84\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"493.61,-55.3 490.11,-45.3 486.61,-55.3 493.61,-55.3\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.graphs.Digraph at 0x103828970>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dr.visualize_execution([\"statistics_by_city\"], \"./dag\", {}, inputs={\"data_dir\" : \"data\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7851627f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = dr.execute([\"statistics_by_city\"], inputs={\"data_dir\" : \"data\"})[\"statistics_by_city\"]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eda60e6b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"mean_guest_satisfaction\"]\\\n",
    "    .sort_values(ascending=False)\\\n",
    "    .plot(kind=\"bar\", title=\"Guest satisfaction by city\", ylim=(80, 100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35811a49",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"mean_price\"]\\\n",
    "    .sort_values(ascending=False).plot(kind=\"bar\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f802d46",
   "metadata": {},
   "outputs": [],
   "source": [
    "s = df[\"mean_price\"]\n",
    "s.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ed91fb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"mean_price_per_person\"]\\\n",
    "    .sort_values(ascending=False)\\\n",
    "    .plot(kind=\"bar\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5580877e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"cleanliness_ratings_mean\"]\\\n",
    "    .sort_values(ascending=False)\\\n",
    "    .plot(kind=\"bar\", ylim=(8,10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06b866a2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
